import pandas as pd
import numpy as np
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from scipy.optimize import minimize

# File paths for your datasets
file_paths = [
    r"C:/Users/HP/Desktop/quantitative_easing_financial_data.csv",
    r"C:/Users/HP/Desktop/processed_brexit_features.csv",
    r"C:/Users/HP/Desktop/processed_covid_19_crisis_features.csv",
    r"C:/Users/HP/Desktop/processed_dodd_frank_act_features.csv",
    r"C:/Users/HP/Desktop/processed_eurozone_debt_crisis_features.csv",
    r"C:/Users/HP/Desktop/processed_global_financial_crisis_features.csv",
    r"C:/Users/HP/Desktop/processed_quantitative_easing_features.csv"
]

# Load and concatenate datasets
dfs = [pd.read_csv(path) for path in file_paths]
data = pd.concat(dfs, ignore_index=True)

# Identify the correct date column dynamically
date_column_candidates = ['Date', 'date', 'Timestamp', 'timestamp']
date_column = None

for col in date_column_candidates:
    if col in data.columns:
        date_column = col
        break

if date_column is None:
    raise ValueError(f"No date column found. Please ensure one of {date_column_candidates} exists in the dataset.")

# Convert the date column to datetime and set it as the index
data[date_column] = pd.to_datetime(data[date_column], errors='coerce')
data.dropna(subset=[date_column], inplace=True)  # Remove rows with invalid dates
data.set_index(date_column, inplace=True)
data.sort_index(inplace=True)

# Handle Missing Values
data.fillna(method='ffill', inplace=True)

# Set the target variable
target_variable = 'Stock_Index'  # Replace with your actual target variable

if target_variable not in data.columns:
    raise ValueError(f"Target variable '{target_variable}' not found in dataset.")

# Seasonal decomposition
result = seasonal_decompose(data[target_variable], model='additive', period=24)  # Adjust period if needed
result.plot()

# Perform stationarity test (ADF Test)
adf_result = adfuller(data[target_variable].dropna())
print("ADF Test Statistic:", adf_result[0])
print("p-value:", adf_result[1])
print("Critical Values:", adf_result[4])

# Fit ARIMA Model
arima_order = (1, 1, 1)  # Replace with your desired order
arima_model = ARIMA(data[target_variable].dropna(), order=arima_order)
arima_fit = arima_model.fit()
print(arima_fit.summary())

# Maximum Likelihood Estimation Example
def log_likelihood(params):
    mu, sigma = params
    residuals = data[target_variable] - mu
    ll = -0.5 * np.sum(np.log(2 * np.pi * sigma ** 2) + (residuals ** 2) / (sigma ** 2))
    return -ll  # Negative log likelihood for minimization

initial_params = [0, 1]  # Initial guesses for mu and sigma
mle_result = minimize(log_likelihood, initial_params, bounds=[(-10, 10), (1e-5, 10)])
print("MLE Results:", mle_result)

# Save processed data for further use
data.to_csv(r"C:/Users/HP/Desktop/processed_combined_data.csv")
